library(ggplot2)
library(reshape2)
library(dplyr)
library(tidyr)
library(GGally)
library(grid)
library(calibrate)
library(knitr)
"%&%" = function(a,b) paste(a,b,sep="")
se <- function(x) sqrt(var(x,na.rm=TRUE)/length(is.na(x)==FALSE))
source('/Volumes/im-lab/nas40t2/hwheeler/PrediXcan_CV/GTEx_2014-06013_release/transfers/PrediXmod/Paper_plots/multiplot.R')
my.dir <- '/Volumes/im-lab/nas40t2/hwheeler/cross-tissue/'
fig.dir <- '~/GitHub/GenArch/GenArchPaper/Figures/'
rna.dir <- my.dir %&% "gtex-rnaseq/"
annot.dir <- my.dir %&% "gtex-annot/"
out.dir <- rna.dir %&% "ind-tissues-RPKM/"
##read in SE calculated from 100 perms - Price et al. method
setable <- read.table(my.dir %&% "SE_estimate_from_h2_localonly_reml-no-constrain_allgenes_100perms.txt",header=T)
dgn <- read.table(my.dir %&% 'expArch_DGN-WB_imputedGTs/DGN-WB.h2.all.models_FHSfdr0.05.Chr1-22_globalAll_reml-no-constrain.2015-12-15.txt',header=T)
tislist <- scan(my.dir %&% 'gtex-rnaseq/ind-tissues-from-nick/GTEx_PrediXmod.tissue.list',sep="\n",what="character")
tisspacelist <- scan(my.dir %&% '40.tissue.list',sep="\n",what="character")
table1 <- matrix(0,nrow=length(tislist)+2,ncol=6)
n <- dgn$N[1]
numexpgenes <- dim(dgn)[1]
#numexpgenes <- length(dgn$local.p[is.na(dgn$local.p)==FALSE])
meanh2 <- sprintf("%.3f",mean(dgn$local.h2,na.rm=TRUE))
seperm <- setable$se[1]
meanandse <- meanh2 %&% " (" %&% seperm %&% ")"
pest <- dgn %>% mutate(pchi=pchisq((local.h2/local.se)^2, df=1, lower.tail=FALSE)) %>% mutate(local.q=p.adjust(pchi,method="BH")) %>% arrange(local.h2) %>% mutate(Qlt05=local.q<0.1)
propsig <- sprintf("%.1f",table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="i"))*100)
numsig <- table(pest$Qlt05)[2]
table1[1,] <- c("DGN Whole Blood",n,meanandse,propsig,numsig,numexpgenes)
hist(pest$local.p,main="DGN")

hist(pest$pchi,main="DGN")

#add cross-tissue to table 1
ct <- read.table(my.dir %&% 'cross-tissue.h2.all.models_FHSfdr0.05.Chr1-22_globaleQTLOtherChr_reml-no-constrain.2015-12-14.txt',header=T)
n <- ct$N[1]
numexpgenes<-dim(ct)[1]
numexpgenes <- length(ct$local.p[is.na(ct$local.p)==FALSE])
meanh2 <- sprintf("%.3f",mean(ct$local.h2,na.rm=TRUE))
seperm <- setable$se[2]
meanandse <- meanh2 %&% " (" %&% seperm %&% ")"
pest <- ct %>% mutate(pchi=pchisq((local.h2/local.se)^2, df=1, lower.tail=FALSE)) %>% mutate(local.q=p.adjust(pchi,method="BH")) %>% arrange(local.h2) %>% mutate(Qlt05=local.q<0.1)
propsig <- sprintf("%.1f",table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="n"))*100)
numsig <- table(pest$Qlt05)[2]
table1[2,] <- c("Cross-tissue",n,meanandse,propsig,numsig,numexpgenes)
hist(pest$local.p,main="CT")

hist(pest$pchi,main="CT")

##calc mean global for DGN
signif(mean(dgn$loc.jt.h2,na.rm=TRUE),3)
## [1] 0.143
signif(se(dgn$loc.jt.h2),3)
## [1] 0.00153
signif(mean(dgn$glo.jt.h2,na.rm=TRUE),3)
## [1] 0.034
signif(se(dgn$glo.jt.h2),3)
## [1] 0.00239
pest <- dgn %>% mutate(glo.jt.P=pchisq((glo.jt.h2/glo.jt.se)^2, df=1, lower.tail=FALSE)) %>% mutate(glo.jt.Q=p.adjust(glo.jt.P,method="BH")) %>% arrange(glo.jt.h2) %>% mutate(Qlt05=glo.jt.Q<0.1)
propsig <- table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="i"))*100
propsig
## TRUE
## 3.931127
table(pest$Qlt05)
##
## FALSE TRUE
## 9692 500
##prop loc
signif(mean(dgn$loc.jt.h2,na.rm=TRUE),3)/(signif(mean(dgn$loc.jt.h2,na.rm=TRUE),3)+signif(mean(dgn$glo.jt.h2,na.rm=TRUE),3))
## [1] 0.8079096
for(i in 1:length(tislist)){
tis <- tislist[i]
data <- read.table(my.dir %&% 'GTEx.TW.' %&% tis %&% '.h2.all.models_FHSfdr0.05.Chr1-22_globaleQTLOtherChr_reml-no-constrain.2015-12-14.txt',header=T,sep="\t")
explist <- scan(out.dir %&% tis %&% ".meanRPKMgt0.1_3samplesRPKMgt0_genelist","c")
data <- dplyr::filter(data,ensid %in% explist)
n <- data$N[1]
numexpgenes <- dim(data)[1]
#numexpgenes <- length(data$local.p[is.na(data$local.p)==FALSE]) ##num expressed genes mean(RPKM)>0.1
meanh2 <- sprintf("%.3f",mean(data$local.h2,na.rm=TRUE))
seperm <- setable$se[i+2]
pest <- data %>% mutate(pchi=pchisq((local.h2/local.se)^2, df=1, lower.tail=FALSE)) %>% mutate(local.q=p.adjust(pchi,method="BH")) %>% arrange(local.h2) %>% mutate(Qlt05=local.q<0.1)
propsig <- sprintf("%.1f",table(pest$Qlt05)[2]/sum(table(pest$Qlt05,useNA="i"))*100)
numsig <- table(pest$Qlt05)[2]
tisspace <- tisspacelist[i]
meanandse <- meanh2 %&% " (" %&% seperm %&% ")"
tableinfo <- c(tisspace,n,meanandse,propsig,numsig,numexpgenes)
table1[i+2,] <- tableinfo
hist(pest$local.p,main=tis)
hist(pest$pchi,main=tis)
}
















































































colnames(table1)=c("tissue","n","mean h2 (SE)","% FDR<0.1","num FDR<0.1","num expressed")
#table1
library(xtable)
## Warning: package 'xtable' was built under R version 3.2.3
tab <- xtable(table1)
print(tab, type="latex",include.rownames=FALSE)
## % latex table generated in R 3.2.1 by xtable 1.8-2 package
## % Tue Aug 16 11:00:12 2016
## \begin{table}[ht]
## \centering
## \begin{tabular}{llllll}
## \hline
## tissue & n & mean h2 (SE) & \% FDR$<$0.1 & num FDR$<$0.1 & num expressed \\
## \hline
## DGN Whole Blood & 922 & 0.149 (0.0039) & 58.8 & 7474 & 12719 \\
## Cross-tissue & 450 & 0.062 (0.017) & 20.2 & 2995 & 14861 \\
## Adipose - Subcutaneous & 298 & 0.038 (0.028) & 18.8 & 2666 & 14205 \\
## Adrenal Gland & 126 & 0.043 (0.018) & 17.5 & 2479 & 14150 \\
## Artery - Aorta & 198 & 0.042 (0.024) & 17.2 & 2385 & 13844 \\
## Artery - Coronary & 119 & 0.037 (0.048) & 16.5 & 2326 & 14127 \\
## Artery - Tibial & 285 & 0.042 (0.02) & 18.4 & 2489 & 13504 \\
## Brain - Anterior cingulate cortex (BA24) & 72 & 0.028 (0.036) & 15.4 & 2235 & 14515 \\
## Brain - Caudate (basal ganglia) & 100 & 0.037 (0.047) & 17.2 & 2521 & 14632 \\
## Brain - Cerebellar Hemisphere & 89 & 0.049 (0.068) & 16.0 & 2294 & 14295 \\
## Brain - Cerebellum & 103 & 0.050 (0.06) & 18.3 & 2646 & 14491 \\
## Brain - Cortex & 96 & 0.045 (0.057) & 17.7 & 2593 & 14689 \\
## Brain - Frontal Cortex (BA9) & 92 & 0.038 (0.077) & 16.6 & 2416 & 14554 \\
## Brain - Hippocampus & 81 & 0.037 (0.05) & 16.4 & 2376 & 14513 \\
## Brain - Hypothalamus & 81 & 0.017 (0.043) & 15.2 & 2238 & 14759 \\
## Brain - Nucleus accumbens (basal ganglia) & 93 & 0.029 (0.04) & 17.1 & 2498 & 14601 \\
## Brain - Putamen (basal ganglia) & 82 & 0.032 (0.064) & 17.3 & 2495 & 14404 \\
## Breast - Mammary Tissue & 183 & 0.029 (0.037) & 17.0 & 2496 & 14700 \\
## Cells - EBV-transformed lymphocytes & 115 & 0.058 (0.1) & 16.6 & 2066 & 12454 \\
## Cells - Transformed fibroblasts & 272 & 0.051 (0.031) & 20.0 & 2552 & 12756 \\
## Colon - Sigmoid & 124 & 0.033 (0.019) & 16.0 & 2298 & 14321 \\
## Colon - Transverse & 170 & 0.036 (0.058) & 16.3 & 2398 & 14676 \\
## Esophagus - Gastroesophageal Junction & 127 & 0.032 (0.039) & 16.1 & 2270 & 14125 \\
## Esophagus - Mucosa & 242 & 0.042 (0.03) & 17.1 & 2432 & 14239 \\
## Esophagus - Muscularis & 219 & 0.039 (0.015) & 17.7 & 2493 & 14047 \\
## Heart - Atrial Appendage & 159 & 0.042 (0.03) & 16.8 & 2327 & 13892 \\
## Heart - Left Ventricle & 190 & 0.034 (0.034) & 16.5 & 2203 & 13321 \\
## Liver & 98 & 0.033 (0.062) & 16.5 & 2230 & 13553 \\
## Lung & 279 & 0.032 (0.034) & 17.0 & 2509 & 14775 \\
## Muscle - Skeletal & 361 & 0.033 (0.03) & 17.9 & 2301 & 12833 \\
## Nerve - Tibial & 256 & 0.052 (0.087) & 20.6 & 2992 & 14510 \\
## Ovary & 85 & 0.037 (0.094) & 17.2 & 2418 & 14094 \\
## Pancreas & 150 & 0.047 (0.024) & 17.2 & 2398 & 13941 \\
## Pituitary & 87 & 0.038 (0.055) & 16.6 & 2527 & 15183 \\
## Skin - Not Sun Exposed (Suprapubic) & 196 & 0.041 (0.034) & 17.0 & 2490 & 14642 \\
## Skin - Sun Exposed (Lower leg) & 303 & 0.039 (0.016) & 18.4 & 2687 & 14625 \\
## Small Intestine - Terminal Ileum & 77 & 0.036 (0.053) & 17.4 & 2591 & 14860 \\
## Spleen & 89 & 0.059 (0.061) & 17.0 & 2451 & 14449 \\
## Stomach & 171 & 0.032 (0.025) & 16.1 & 2338 & 14531 \\
## Testis & 157 & 0.054 (0.044) & 17.8 & 3009 & 16936 \\
## Thyroid & 279 & 0.044 (0.066) & 19.4 & 2838 & 14642 \\
## Whole Blood & 339 & 0.033 (0.023) & 18.6 & 2260 & 12160 \\
## \hline
## \end{tabular}
## \end{table}
print(kable(tab))
##
##
## tissue n mean h2 (SE) % FDR<0.1 num FDR<0.1 num expressed
## ------------------------------------------ ---- --------------- ---------- ------------ --------------
## DGN Whole Blood 922 0.149 (0.0039) 58.8 7474 12719
## Cross-tissue 450 0.062 (0.017) 20.2 2995 14861
## Adipose - Subcutaneous 298 0.038 (0.028) 18.8 2666 14205
## Adrenal Gland 126 0.043 (0.018) 17.5 2479 14150
## Artery - Aorta 198 0.042 (0.024) 17.2 2385 13844
## Artery - Coronary 119 0.037 (0.048) 16.5 2326 14127
## Artery - Tibial 285 0.042 (0.02) 18.4 2489 13504
## Brain - Anterior cingulate cortex (BA24) 72 0.028 (0.036) 15.4 2235 14515
## Brain - Caudate (basal ganglia) 100 0.037 (0.047) 17.2 2521 14632
## Brain - Cerebellar Hemisphere 89 0.049 (0.068) 16.0 2294 14295
## Brain - Cerebellum 103 0.050 (0.06) 18.3 2646 14491
## Brain - Cortex 96 0.045 (0.057) 17.7 2593 14689
## Brain - Frontal Cortex (BA9) 92 0.038 (0.077) 16.6 2416 14554
## Brain - Hippocampus 81 0.037 (0.05) 16.4 2376 14513
## Brain - Hypothalamus 81 0.017 (0.043) 15.2 2238 14759
## Brain - Nucleus accumbens (basal ganglia) 93 0.029 (0.04) 17.1 2498 14601
## Brain - Putamen (basal ganglia) 82 0.032 (0.064) 17.3 2495 14404
## Breast - Mammary Tissue 183 0.029 (0.037) 17.0 2496 14700
## Cells - EBV-transformed lymphocytes 115 0.058 (0.1) 16.6 2066 12454
## Cells - Transformed fibroblasts 272 0.051 (0.031) 20.0 2552 12756
## Colon - Sigmoid 124 0.033 (0.019) 16.0 2298 14321
## Colon - Transverse 170 0.036 (0.058) 16.3 2398 14676
## Esophagus - Gastroesophageal Junction 127 0.032 (0.039) 16.1 2270 14125
## Esophagus - Mucosa 242 0.042 (0.03) 17.1 2432 14239
## Esophagus - Muscularis 219 0.039 (0.015) 17.7 2493 14047
## Heart - Atrial Appendage 159 0.042 (0.03) 16.8 2327 13892
## Heart - Left Ventricle 190 0.034 (0.034) 16.5 2203 13321
## Liver 98 0.033 (0.062) 16.5 2230 13553
## Lung 279 0.032 (0.034) 17.0 2509 14775
## Muscle - Skeletal 361 0.033 (0.03) 17.9 2301 12833
## Nerve - Tibial 256 0.052 (0.087) 20.6 2992 14510
## Ovary 85 0.037 (0.094) 17.2 2418 14094
## Pancreas 150 0.047 (0.024) 17.2 2398 13941
## Pituitary 87 0.038 (0.055) 16.6 2527 15183
## Skin - Not Sun Exposed (Suprapubic) 196 0.041 (0.034) 17.0 2490 14642
## Skin - Sun Exposed (Lower leg) 303 0.039 (0.016) 18.4 2687 14625
## Small Intestine - Terminal Ileum 77 0.036 (0.053) 17.4 2591 14860
## Spleen 89 0.059 (0.061) 17.0 2451 14449
## Stomach 171 0.032 (0.025) 16.1 2338 14531
## Testis 157 0.054 (0.044) 17.8 3009 16936
## Thyroid 279 0.044 (0.066) 19.4 2838 14642
## Whole Blood 339 0.033 (0.023) 18.6 2260 12160
Calculate num expressed genes and make lists per tissue for filtering
- Expressed: mean RPKM > 0.1 and at least 3 samples with RPKM > 0
tislist <- scan(my.dir %&% 'tissue.list',sep="\n",what="character")
expidlist <- scan(rna.dir %&% "GTEx_Analysis_2014-06-13.RNA-seq.ID.list","character")
expgenelist <- scan(rna.dir %&% "GTEx_Analysis_2014-06-13.RNA-seq.GENE.list","character")
exp <- readRDS(rna.dir %&% "GTEx_Analysis_2014-06-13.RNA-seq.GENExID.RDS")
expdata <- matrix(exp, ncol=length(expidlist), byrow=T)
t.expdata <- t(expdata)
rownames(t.expdata) <- expidlist
colnames(t.expdata) <- expgenelist
gencodefile <- annot.dir %&% "gencode.v18.genes.patched_contigs.summary.protein"
gencode <- read.table(gencodefile)
rownames(gencode) <- gencode[,5]
t.expdata <- t.expdata[,intersect(colnames(t.expdata),rownames(gencode))] ###pull protein coding gene expression data
sam <- read.table(annot.dir %&% "GTEx_Analysis_2014-06-13.SampleTissue.annot",header=T,sep="\t")
for(i in 1:length(tislist)){
tissue <- tislist[i]
tis<- gsub(' ','',tissue) ##removes all whitespace to match .RDS files
sample <- subset(sam,SMTSD == tissue) ### pull sample list of chosen tissue
tissue.exp <- t.expdata[intersect(rownames(t.expdata),sample$SAMPID),] ###pull expression data for chosen tissue###
tissue.exp <- t(tissue.exp) #for merging in R
explist <- subset(rowMeans(tissue.exp), rowMeans(tissue.exp)>0.1) ###pull genes with mean expression > 0.1###
explist <- names(explist)
nz.expdata <- tissue.exp[explist,]
#calc 10% of sample size
tenpercent <- round(0.1*dim(nz.expdata)[2])
rowtable<-function(x) table(x>0)[[1]] > 2 ##function to determine if >2 samples have exp levels >0
nonbin<-apply(nz.expdata,1,rowtable) ##apply to matrix
gt2.expdata <- nz.expdata[nonbin,] ##remove genes with >10% of RPKM's==0
write.table(rownames(gt2.expdata),file=out.dir %&% tis %&% ".meanRPKMgt0.1_3samplesRPKMgt0_genelist",quote=FALSE,col.names = FALSE,row.names=FALSE)
cat(tis,":",dim(gt2.expdata)[1],"genes\n")
}